{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "pycharm": {
     "is_executing": false
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import random\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from easydict import EasyDict\n",
    "from scipy.io import loadmat\n",
    "\n",
    "np.random.seed(0)\n",
    "random.seed(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "pa100k_dir = './data/PA100k/'\n",
    "peta_dir = './data/PETA/'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   Female  AgeOver60  Age18-60  AgeLess18  Front  Side  Back  Hat  Glasses  \\\n",
      "0       1          0         1          0      1     0     0    0        0   \n",
      "1       0          0         1          0      0     0     1    0        0   \n",
      "2       1          0         1          0      0     1     0    0        0   \n",
      "3       0          0         1          0      0     0     1    0        0   \n",
      "4       1          0         1          0      0     1     0    0        0   \n",
      "\n",
      "   HandBag  ...  UpperPlaid  UpperSplice  LowerStripe  LowerPattern  LongCoat  \\\n",
      "0        0  ...           0            0            0             0         0   \n",
      "1        0  ...           0            0            0             0         0   \n",
      "2        0  ...           0            0            0             0         0   \n",
      "3        0  ...           0            0            0             0         0   \n",
      "4        1  ...           0            0            0             0         0   \n",
      "\n",
      "   Trousers  Shorts  Skirt&Dress  boots    image_id  \n",
      "0         1       0            0      0  000001.jpg  \n",
      "1         1       0            0      0  000002.jpg  \n",
      "2         1       0            0      0  000003.jpg  \n",
      "3         0       1            0      0  000004.jpg  \n",
      "4         1       0            0      0  000005.jpg  \n",
      "\n",
      "[5 rows x 27 columns]\n"
     ]
    }
   ],
   "source": [
    "\"\"\"\n",
    "create a PA100K peta_dataset description file, which consists of images, labels\n",
    "\"\"\"\n",
    "pa100k_data = loadmat(os.path.join(pa100k_dir, 'annotation.mat'))\n",
    "\n",
    "pa100k_dataset = EasyDict()\n",
    "pa100k_dataset.description = 'pa100k'\n",
    "# pa100k_dataset.root = os.path.join(pa100k_dir, 'data')\n",
    "\n",
    "train_image_name = [pa100k_data['train_images_name'][i][0][0] for i in range(80000)]\n",
    "val_image_name = [pa100k_data['val_images_name'][i][0][0] for i in range(10000)]\n",
    "test_image_name = [pa100k_data['test_images_name'][i][0][0] for i in range(10000)]\n",
    "pa100k_dataset.image_name = train_image_name + val_image_name + test_image_name\n",
    "\n",
    "pa100k_dataset.label = np.concatenate(\n",
    "    (pa100k_data['train_label'], pa100k_data['val_label'], pa100k_data['test_label']), axis=0)\n",
    "assert pa100k_dataset.label.shape == (100000, 26)\n",
    "pa100k_dataset.attr_name = [pa100k_data['attributes'][i][0][0] for i in range(26)]\n",
    "\n",
    "pa100k_df = pd.DataFrame(data=pa100k_dataset.label,\n",
    "                         index=[i for i in range(pa100k_dataset.label.shape[0])],\n",
    "                         columns=pa100k_dataset.attr_name)\n",
    "\n",
    "pa100k_df['image_id'] = pa100k_dataset.image_name\n",
    "print(pa100k_df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "pycharm": {
     "is_executing": true,
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   personalLess30  personalLess45  personalLess60  personalLarger60  \\\n",
      "0               0               0               1                 0   \n",
      "1               0               1               1                 0   \n",
      "2               0               1               1                 0   \n",
      "3               0               1               1                 0   \n",
      "4               1               0               1                 0   \n",
      "\n",
      "   carryingBackpack  carryingOther  lowerBodyCasual  upperBodyCasual  \\\n",
      "0                 0              1                1                0   \n",
      "1                 0              0                1                0   \n",
      "2                 0              0                1                0   \n",
      "3                 0              0                1                0   \n",
      "4                 0              0                1                0   \n",
      "\n",
      "   lowerBodyFormal  upperBodyFormal  ...  carryingBrown  carryingGreen  \\\n",
      "0                0                0  ...              0              0   \n",
      "1                0                0  ...              0              0   \n",
      "2                0                0  ...              0              0   \n",
      "3                0                0  ...              0              0   \n",
      "4                0                0  ...              1              0   \n",
      "\n",
      "   carryingGrey  carryingOrange  carryingPink  carryingPurple  carryingRed  \\\n",
      "0             0               0             0               0            0   \n",
      "1             0               0             0               0            0   \n",
      "2             0               0             0               0            0   \n",
      "3             0               0             0               0            0   \n",
      "4             0               0             0               0            0   \n",
      "\n",
      "   carryingWhite  carryingYellow    image_id  \n",
      "0              0               0  000001.jpg  \n",
      "1              0               0  000002.jpg  \n",
      "2              0               0  000003.jpg  \n",
      "3              0               0  000004.jpg  \n",
      "4              0               0  000005.jpg  \n",
      "\n",
      "[5 rows x 117 columns]\n"
     ]
    }
   ],
   "source": [
    "annotated_pa100k_df = pd.read_csv(os.path.join(pa100k_dir, 'program_annotated_zero_one_pa100k.csv'))\n",
    "\n",
    "annotated_pa100k_df = annotated_pa100k_df.drop(annotated_pa100k_df.columns[0], axis=1)\n",
    "\n",
    "annotated_pa100k_df['accessoryHat'] = pa100k_df['Hat']\n",
    "annotated_pa100k_df['accessorySunglasses'] = pa100k_df['Glasses']\n",
    "annotated_pa100k_df['carryingBackpack'] = pa100k_df['Backpack']\n",
    "annotated_pa100k_df['carryingFolder'] = pa100k_df['HandBag']\n",
    "annotated_pa100k_df['carryingMessengerBag'] = pa100k_df['ShoulderBag']\n",
    "annotated_pa100k_df['carryingOther'] = pa100k_df['HoldObjectsInFront']\n",
    "annotated_pa100k_df['footwearBoots'] = pa100k_df['boots']\n",
    "annotated_pa100k_df['lowerBodyShortSkirt'] = pa100k_df['Skirt&Dress']\n",
    "annotated_pa100k_df['lowerBodyShorts'] = pa100k_df['Shorts']\n",
    "annotated_pa100k_df['lowerBodyTrousers'] = pa100k_df['Trousers']\n",
    "annotated_pa100k_df['personalLarger60'] = pa100k_df['AgeOver60']\n",
    "annotated_pa100k_df['personalLess15'] = pa100k_df['AgeLess18']\n",
    "annotated_pa100k_df['personalLess60'] = pa100k_df['Age18-60']\n",
    "annotated_pa100k_df['upperBodyCasual'] = pa100k_df['UpperPlaid']\n",
    "annotated_pa100k_df['upperBodyFormal'] = pa100k_df['LongCoat']\n",
    "annotated_pa100k_df['upperBodyLogo'] = pa100k_df['UpperLogo']\n",
    "annotated_pa100k_df['upperBodyLongSleeve'] = pa100k_df['LongSleeve']\n",
    "annotated_pa100k_df['upperBodyOther'] = pa100k_df['UpperStride'] | pa100k_df['UpperSplice']\n",
    "annotated_pa100k_df['upperBodyShortSleeve'] = pa100k_df['ShortSleeve']\n",
    "\n",
    "print(annotated_pa100k_df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100000, 117)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "annotated_pa100k_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "pycharm": {
     "is_executing": false,
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "create a PETA peta_dataset description file, which consists of images, labels\n",
    "\"\"\"\n",
    "peta_data = loadmat(os.path.join(peta_dir, 'PETA_added.mat'))\n",
    "\n",
    "peta_dataset = EasyDict()\n",
    "peta_dataset.description = 'peta'\n",
    "peta_dataset.reorder = 'group_order'\n",
    "peta_dataset.root = os.path.join(peta_dir, 'images')\n",
    "peta_dataset.image_name = [f'{100000 + i + 1:06}.png' for i in range(19000)]\n",
    "\n",
    "raw_attr_name = [i[0][0].strip() for i in peta_data['peta'][0][0][1]]\n",
    "# (19000, 105)\n",
    "raw_label = peta_data['peta'][0][0][0][:, 4:]\n",
    "\n",
    "\n",
    "# (1900, 116)\n",
    "peta_dataset.label = raw_label\n",
    "peta_dataset.attr_name = raw_attr_name\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   personalLess30  personalLess45  personalLess60  personalLarger60  \\\n",
      "0               1               0               0                 0   \n",
      "1               1               0               0                 0   \n",
      "2               1               0               0                 0   \n",
      "3               1               0               0                 0   \n",
      "4               1               0               0                 0   \n",
      "\n",
      "   carryingBackpack  carryingOther  lowerBodyCasual  upperBodyCasual  \\\n",
      "0                 0              0                1                0   \n",
      "1                 0              0                1                0   \n",
      "2                 0              0                1                0   \n",
      "3                 0              0                1                1   \n",
      "4                 0              0                1                1   \n",
      "\n",
      "   lowerBodyFormal  upperBodyFormal  ...  carryingBrown  carryingGreen  \\\n",
      "0                0                0  ...              0              0   \n",
      "1                0                0  ...              0              0   \n",
      "2                0                0  ...              0              0   \n",
      "3                0                0  ...              0              0   \n",
      "4                0                0  ...              0              0   \n",
      "\n",
      "   carryingGrey  carryingOrange  carryingPink  carryingPurple  carryingRed  \\\n",
      "0             0               0             0               0            0   \n",
      "1             0               0             0               0            0   \n",
      "2             0               0             0               0            0   \n",
      "3             0               0             0               0            0   \n",
      "4             0               0             0               0            0   \n",
      "\n",
      "   carryingWhite  carryingYellow    image_id  \n",
      "0              0               0  100001.png  \n",
      "1              0               0  100002.png  \n",
      "2              0               0  100003.png  \n",
      "3              0               0  100004.png  \n",
      "4              0               0  100005.png  \n",
      "\n",
      "[5 rows x 117 columns]\n"
     ]
    }
   ],
   "source": [
    "peta_df = pd.DataFrame(data=peta_dataset.label,\n",
    "                         index=[i for i in range(peta_dataset.label.shape[0])],\n",
    "                         columns=peta_dataset.attr_name)\n",
    "\n",
    "peta_df['image_id'] = peta_dataset.image_name\n",
    "print(peta_df.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(19000, 117)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "peta_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert (annotated_pa100k_df.columns == peta_df.columns).all()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Rename Peta image files to start from 100K onwards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'./data/PETA/images'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "peta_dataset.root"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./data/PETA/images\\00001.png\n",
      "./data/PETA/images\\100001.png\n",
      "./data/PETA/images\\00002.png\n",
      "./data/PETA/images\\100002.png\n",
      "./data/PETA/images\\00003.png\n",
      "./data/PETA/images\\100003.png\n",
      "./data/PETA/images\\00004.png\n",
      "./data/PETA/images\\100004.png\n",
      "./data/PETA/images\\00005.png\n",
      "./data/PETA/images\\100005.png\n",
      "./data/PETA/images\\00006.png\n",
      "./data/PETA/images\\100006.png\n",
      "./data/PETA/images\\00007.png\n",
      "./data/PETA/images\\100007.png\n",
      "./data/PETA/images\\00008.png\n",
      "./data/PETA/images\\100008.png\n",
      "./data/PETA/images\\00009.png\n",
      "./data/PETA/images\\100009.png\n",
      "./data/PETA/images\\00010.png\n",
      "./data/PETA/images\\100010.png\n"
     ]
    }
   ],
   "source": [
    "i= 0\n",
    "\n",
    "for count, filename in enumerate(os.listdir(peta_dataset.root)): \n",
    "    if i < 10:\n",
    "        new_name = f'{100000 + i + 1:06}.png'\n",
    "\n",
    "        src = os.path.join(peta_dataset.root, filename) \n",
    "        dst = os.path.join(peta_dataset.root, new_name) \n",
    "\n",
    "        print(src)\n",
    "        print(dst)\n",
    "        # rename() function will \n",
    "        # rename all the files \n",
    "    #     os.rename(src, dst) \n",
    "        i = i + 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  },
  "pycharm": {
   "stem_cell": {
    "cell_type": "raw",
    "metadata": {
     "collapsed": false
    },
    "source": []
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
